package com.tl.spark.scala

import org.apache.spark.{SparkConf, SparkContext}


/**
  * @program: spark-test
  * @description:
  * @author: dong.tl
  * @create: 2019-08-29 08:57
  **/
object SparkReaderHDFS {
  def main(args: Array[String]): Unit = {

    //设置 hdfs用户
    val properties = System.getProperties
    properties.setProperty("HADOOP_USER_NAME", "hdfs")

    //创建sparkConf
    val sparkConf = new SparkConf()
    //设置spark的任务名
//    sparkConf.setAppName("read and write for hbase ")
    //设置master地址

    sparkConf.setMaster("yarn-client")
    sparkConf.setAppName("YarnTest")
    .set("spark.driver.extraJavaOptions","-Dhdp.version=3.1.0.0-78")
    .set("spark.yarn.am.extraJavaOptions","-Dhdp.version=3.1.0.0-78")
    .set("spark.yarn.jars", "hdfs://db03:8020/user/spark/share/lib/*.jar")
      //    sparkConf.set("spark.yarn.dist.files", "F:\\IdeaProjects\\spark-test\\src\\main\\resources\\yarn-site.xml")
      //    sparkConf.setMaster("local[*]")
      //    sparkConf.setMaster("spark://192.168.163.247:7077")
      .setJars(Seq("F:\\IdeaProjects\\spark-test\\target\\spark-test-1.0-SNAPSHOT.jar"))
    //创建spark上下文
    val sc = new SparkContext(sparkConf)



    var textIn = sc.textFile("hdfs://192.168.163.235:8020/datax-test")
    println(textIn.count())
    textIn.foreach(println(_))

//    var dstPath = "/datax-test"
//
//    val hdfs= FileSystem.get(new java.net.URI("hdfs://192.168.163.235:8020"),new Configuration())
//    if(hdfs.exists(new Path(dstPath))){
//
//      var dstPath2 = new Path(dstPath)
//
//      println("delete exits files")
//      hdfs.delete(new Path(dstPath),true)
//    }


      sc.stop()




  }
}
